Abstract
Here, we filter the PSM-level PD output, with thresholds informed by missing values, notch prominence and observed fold changes vs ground truths.
Load libraries
#### Load packages ####
library(camprotR)
library(tidyverse)
library(MSnbase)
library(biobroom)
library(gtools)
Read in the PSM data
psm_res <- readRDS('../results/psm_res.rds')
Plotting the proportion of missing values
psm_res %>% names() %>% lapply(function(x){
all <- psm_res[[x]]
hs <- all[fData(all)$species=='H.sapiens']
sc <- all[fData(all)$species=='S.cerevisiae']
slices <- list('All'=all, 'H.sapiens'=hs, 'S.cerevisiae'=sc)
for(slice in names(slices)){
p <- slices[[slice]] %>% plot_missing_SN() +
ggtitle(sprintf('%s - %s', x, slice))
print(p)
p <- slices[[slice]] %>% plot_missing_SN_per_sample() +
ggtitle(sprintf('%s - %s', x, slice))
print(p)
}
return(NULL)
})
[[1]]
NULL
[[2]]
NULL
OK, so essentially all the missing values are restricted to PSMs with low (<20) Signal:Noise ratios.
source('../R/get_quant_vs_mean.R')
quant_vs_mean <- psm_res %>% lapply(get_quant_vs_mean)
plot(density(quant_vs_mean$`AGC: 5E4`$Delta.Score))
plot(density(quant_vs_mean$`AGC: 5E4`$Isolation.Interference.in.Percent))
quant_vs_mean %>% names() %>% lapply(function(x){
p <- quant_vs_mean[[x]] %>%
select(id, species, binned_average_sn, binned_interference, binned_delta) %>%
unique() %>%
group_by(species, binned_average_sn, binned_interference, binned_delta) %>%
tally() %>%
ggplot(aes(binned_interference, n)) +
geom_bar(stat='identity') +
facet_wrap(~species, scales='free') +
theme_camprot(base_size=15) +
theme(axis.text.x=element_text(angle=45, vjust=1, hjust=1)) +
ggtitle(x)
print(p)
print(p + aes(binned_average_sn) +
xlab('Signal/Noise'))
print(p + aes(binned_delta) +
xlab('Delta score'))
return(NULL)
})
[[1]]
NULL
[[2]]
NULL
Define the expected ratios from the experimental design
exp_design <- pData(psm_res$`AGC: 2E5`) %>%
select(condition, S.cerevisiae=yeast, H.sapiens=human) %>%
unique()
sc_spikes <- exp_design$S.cerevisiae
hs_spikes <- exp_design$H.sapiens
get_ground_truth <- function(sc_spikes, hs_spikes, ix_1, ix_2){
comparison <- sprintf('%s vs %s', sc_spikes[ix_2], sc_spikes[ix_1])
hs_ground_truth <- hs_spikes[ix_2]/hs_spikes[ix_1]
sc_ground_truth <- sc_spikes[ix_2]/sc_spikes[ix_1]
return(c(comparison, hs_ground_truth, sc_ground_truth))
}
expected <- apply(permutations(n=3,r=2), 1, function(x){
get_ground_truth(sc_spikes, hs_spikes, x[1], x[2])
}) %>% t() %>% data.frame() %>%
setNames(c('comparison', 'H.sapiens', 'S.cerevisiae')) %>%
mutate_at(vars(S.cerevisiae,
H.sapiens),
funs(as.numeric)) %>%
pivot_longer(-comparison, names_to='species', values_to='expected')
`funs()` is deprecated as of dplyr 0.8.0.
Please use a list of either functions or lambdas:
# Simple named list:
list(mean = mean, median = median)
# Auto named with `tibble::lst()`:
tibble::lst(mean, median)
# Using lambdas
list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
This warning is displayed once every 8 hours.
Call `lifecycle::last_warnings()` to see where this warning was generated.
print(expected)
positive_comparisons <- expected %>% filter(species=='S.cerevisiae', expected>1) %>%
pull(comparison)
Visualising how interference and tag intensity affect observed fold changes
quant_vs_mean %>% names() %>% lapply(function(x){
p <- quant_vs_mean[[x]] %>%
filter(Isolation.Interference.in.Percent<=50, # no need to consider interference>=50%
comparison %in% positive_comparisons) %>%
filter(species=='S.cerevisiae', !below_notch) %>%
ggplot(aes(diff, colour=binned_intensity)) +
geom_density() +
theme_camprot(base_size=15) +
theme(axis.text.x=element_text(angle=45, vjust=1, hjust=1)) +
facet_grid(binned_interference~comparison, scales='free') +
geom_vline(aes(xintercept=log2(expected)),
data=expected[(expected$species=='S.cerevisiae' &
expected$comparison %in% positive_comparisons),],
colour=get_cat_palette(1), linetype=2) +
ylab('Density') +
xlab('Difference in intensity') +
xlim(-6,3) +
ggtitle(x) +
scale_colour_discrete(name='Intensity')
print(p)
print(p + aes(colour=binned_interference) + facet_grid(binned_average_sn~comparison) +
scale_colour_discrete(name='Interference (%)'))
print(p + aes(colour=binned_interference) + facet_grid(binned_delta~comparison) +
scale_colour_discrete(name='Interference (%)'))
print(p + aes(colour=binned_delta) + facet_wrap(~comparison) +
coord_cartesian(xlim=c(-2, 3)) +
scale_colour_manual(name='Delta score', values=get_cat_palette(7)))
return(NULL)
})
[[1]]
NULL
[[2]]
NULL
quant_vs_mean$`AGC: 5E4` %>%
filter(Isolation.Interference.in.Percent<=50, # no need to consider interference>=50%
comparison %in% positive_comparisons) %>%
filter(species=='S.cerevisiae', !below_notch) %>%
ggplot(aes(Delta.Score, diff)) +
geom_point(size=0.1, alpha=0.1) +
geom_smooth() +
theme_camprot(base_size=15) +
theme(axis.text.x=element_text(angle=45, vjust=1, hjust=1)) +
facet_grid(~comparison, scales='free') +
geom_hline(aes(yintercept=log2(expected)),
data=expected[(expected$species=='S.cerevisiae' &
expected$comparison %in% positive_comparisons),],
colour=get_cat_palette(1), linetype=2) +
ylab('Fold change (log2)') +
xlab('Delta scores')
Summarise the fold changes to compare median fold changes over bins of tag intensity and interference
quant_vs_mean %>% names() %>% lapply(function(x){
p <- quant_vs_mean[[x]] %>%
filter(Isolation.Interference.in.Percent<=60) %>% # no need to consider interference>=60%
filter(species=='S.cerevisiae', !below_notch, comparison=='6 vs 1') %>%
group_by(binned_interference, binned_intensity) %>%
summarise(median_diff=2^median(diff, na.rm=TRUE), n=length(diff)) %>%
ggplot(aes(binned_interference, binned_intensity, fill=median_diff)) +
geom_tile(colour='grey') +
theme_camprot(base_size=15) +
scale_fill_gradient(high=get_cat_palette(2)[2],
low='white',
limits=c(0, 6), name='Observed\nfold change') +
theme(axis.text.x=element_text(angle=45, vjust=1, hjust=1)) +
xlab('Binned interference') +
ylab('Binned intensity') +
ggtitle(x)
print(p + geom_text(aes(label=round(median_diff, 1)), size=3))
print(p +
aes(fill=n) +
scale_fill_gradient(high=get_cat_palette(3)[3],
low='white') +
geom_text(aes(label=n), size=3) )
})
`summarise()` regrouping output by 'binned_interference' (override with `.groups` argument)
[[1]]
[[2]]
Repeat the above, but also split by Delta
quant_vs_mean %>% names() %>% lapply(function(x){
p <- quant_vs_mean[[x]] %>%
filter(Isolation.Interference.in.Percent<=60) %>% # no need to consider interference>=60%
filter(species=='S.cerevisiae', !below_notch, comparison=='6 vs 1') %>%
group_by(binned_interference, binned_intensity, binned_delta) %>%
summarise(median_diff=2^median(diff, na.rm=TRUE), n=length(diff)) %>%
ggplot(aes(binned_interference, binned_intensity, fill=median_diff)) +
geom_tile(colour='grey') +
theme_camprot(base_size=10) +
scale_fill_gradient(high=get_cat_palette(2)[2],
low='white',
limits=c(0, 6), name='Observed\nfold change') +
theme(axis.text.x=element_text(angle=45, vjust=1, hjust=1)) +
xlab('Binned interference') +
ylab('Binned intensity') +
ggtitle(x) +
facet_wrap(~binned_delta)
print(p + geom_text(aes(label=round(median_diff, 1)), size=3))
print(p +
aes(fill=n) +
scale_fill_gradient(high=get_cat_palette(3)[3],
low='white') +
geom_text(aes(label=n), size=3) )
})
`summarise()` regrouping output by 'binned_interference', 'binned_intensity' (override with `.groups` argument)
[[1]]
[[2]]
Let’s check how isolation interference and delta interact with tag intensity to impact the PSM-level fold change estimates. We will just focus on PSMs with interference <= 60%.
quant_vs_mean %>% names() %>% lapply(function(x){
p <- quant_vs_mean[[x]] %>%
# Don't want to consider interference > 60%
filter(species=='S.cerevisiae', Isolation.Interference.in.Percent<=60,
!comparison %in% positive_comparisons) %>%
ggplot(aes(log2(intensity), diff)) +
theme_camprot(base_size=12) +
facet_wrap(~comparison, scales='free_y') +
geom_hline(aes(yintercept=log2(expected)),
data=(expected %>%
filter(species=='S.cerevisiae',
!comparison %in% positive_comparisons)),
colour='black', linetype=2) +
xlab('Tag intensity (log2)') +
ylab('Difference in intensity (log2)') +
ggtitle(x)
print(p + geom_point(size=0.1, alpha=0.1))
print(p + geom_point(size=0.1, alpha=0.1) + geom_smooth(se=FALSE, size=0.5))
print(p + geom_point(size=0.1, alpha=0.1, colour='grey80') +
geom_smooth(aes(colour=binned_interference), se=FALSE, size=0.5) +
scale_colour_manual(values=c(get_cat_palette(6)),
name='Isolation interference (%)'))
print(p + geom_point(size=0.1, alpha=0.1, colour='grey80') +
geom_smooth(se=FALSE, size=0.5) +
facet_grid(binned_delta~comparison))
print(p + geom_point(size=0.1, alpha=0.1, colour='grey80') +
geom_smooth(aes(colour=binned_delta), se=FALSE, size=0.5) +
scale_colour_manual(values=c(get_cat_palette(7)),
name='Delta score'))
return(NULL)
})
[[1]]
NULL
[[2]]
NULL
Based on the above, I’m going to use the following range of thresholds: - Delta >= [0, 0.2, 0.5] - Isolation interference <= [10%, 50%, 100%] - Signal/Noise <= [0, 10, 100]
OK, so as we expect, the observed fold changes tend towards the truth as tag intensity increases. At very high tag intensities, there is an issue with more PSMs being false identifications so the observed ratios are what we observing is the ratio from a human protein, hence the ‘uptick’ in ratios for e.g ‘1 vs 6’ at high intensities. We also observe the clear understatimate of ratios when the tag intensity is below the notch.
We observe that this ‘uptick’ only occurs when Delta score < 0.5. This fits with the expectation that these PSMs are actually from human peptides, but the rank 1 peptide is a yeast peptide, likely an ortholog.
We also observe that in the range of tag intensities where the ratio is close to the truth (~2^3 - 2^7), the observed ratio is closer to the truth with low interference (<20%)
Let’s plot the tag intensity vs difference in intensity for all interference thresholds
interference_thresholds <- c(10, 50, 100)
for(x in names(quant_vs_mean)){
for(int_threshold in interference_thresholds){
tmp_data <- quant_vs_mean[[x]] %>%
filter(species!='mixed',
!comparison %in% positive_comparisons,
Isolation.Interference.in.Percent<=int_threshold)
p <- tmp_data %>%
ggplot(aes(log2(intensity), diff)) +
geom_point(size=0.05, alpha=0.05, colour='grey10') +
geom_density2d(size=0.3, colour=get_cat_palette(2)[2]) +
theme_camprot(base_size=12) +
facet_grid(species~comparison, scales='free_y') +
geom_hline(aes(yintercept=log2(expected)),
data=expected[!expected$comparison %in% positive_comparisons,],
colour='black', linetype=2) +
xlab('Tag intensity (log2)') +
ylab('Difference in intensity (log2)') +
coord_cartesian(ylim=c(-4,4))
print(p + ggtitle(sprintf('%s - Interference <= %s', x, int_threshold)))
}
}
Now, let’s filter the PSMs against the interference and average S/N thresholds +/- notch filtering.
delta_thresholds <- c(0, 0.2, 0.5)
interference_thresholds <- c(10, 50, 100)
sn_thresholds <- c(0, 10, 100)
notch_thresholded <- c(TRUE, FALSE)
combinations <- as.list(as.data.frame(t(crossing(
delta_thresholds,
interference_thresholds,
sn_thresholds,
notch_thresholded))))
names(combinations) <- lapply(combinations, FUN=function(x){
sprintf('%s, %s, %s, %s', x[1], x[2], x[3], x[4])
})
psm_res_flt <- psm_res %>% lapply(function(x){
x <- x %>% log(base=2) %>% MSnbase::normalise(method='diff.median')
exprs(x) <- 2^exprs(x)
combinations %>% lapply(function(thresholds){
delta_threshold <- thresholds[1]
interference_threshold <- thresholds[2]
sn_threshold<- thresholds[3]
notch <- thresholds[4]
message(sprintf(paste0('Thresholding with delta > %s, co-isolation <= %s, ',
'average S/N <= %s, notch PSMs removed: %s'),
delta_threshold, interference_threshold,
sn_threshold, as.character(as.logical(notch))))
out <- filter_TMT_PSMs(x, inter_thresh=interference_threshold,
sn_thresh=sn_threshold, verbose=FALSE)
out <- out[fData(out)$Delta.Score>=delta_threshold,]
if(notch){
out <- out[apply(exprs(out), 1, function(x) min(x, na.rm=TRUE))>log2(5.75)]
}
out
})
})
Thresholding with delta > 0, co-isolation <= 10, average S/N <= 0, notch PSMs removed: FALSE
Thresholding with delta > 0, co-isolation <= 10, average S/N <= 0, notch PSMs removed: TRUE
Thresholding with delta > 0, co-isolation <= 10, average S/N <= 10, notch PSMs removed: FALSE
Thresholding with delta > 0, co-isolation <= 10, average S/N <= 10, notch PSMs removed: TRUE
Thresholding with delta > 0, co-isolation <= 10, average S/N <= 100, notch PSMs removed: FALSE
Thresholding with delta > 0, co-isolation <= 10, average S/N <= 100, notch PSMs removed: TRUE
Thresholding with delta > 0, co-isolation <= 50, average S/N <= 0, notch PSMs removed: FALSE
Thresholding with delta > 0, co-isolation <= 50, average S/N <= 0, notch PSMs removed: TRUE
Thresholding with delta > 0, co-isolation <= 50, average S/N <= 10, notch PSMs removed: FALSE
Thresholding with delta > 0, co-isolation <= 50, average S/N <= 10, notch PSMs removed: TRUE
Thresholding with delta > 0, co-isolation <= 50, average S/N <= 100, notch PSMs removed: FALSE
Thresholding with delta > 0, co-isolation <= 50, average S/N <= 100, notch PSMs removed: TRUE
Thresholding with delta > 0, co-isolation <= 100, average S/N <= 0, notch PSMs removed: FALSE
Thresholding with delta > 0, co-isolation <= 100, average S/N <= 0, notch PSMs removed: TRUE
Thresholding with delta > 0, co-isolation <= 100, average S/N <= 10, notch PSMs removed: FALSE
Thresholding with delta > 0, co-isolation <= 100, average S/N <= 10, notch PSMs removed: TRUE
Thresholding with delta > 0, co-isolation <= 100, average S/N <= 100, notch PSMs removed: FALSE
Thresholding with delta > 0, co-isolation <= 100, average S/N <= 100, notch PSMs removed: TRUE
Thresholding with delta > 0.2, co-isolation <= 10, average S/N <= 0, notch PSMs removed: FALSE
Thresholding with delta > 0.2, co-isolation <= 10, average S/N <= 0, notch PSMs removed: TRUE
Thresholding with delta > 0.2, co-isolation <= 10, average S/N <= 10, notch PSMs removed: FALSE
Thresholding with delta > 0.2, co-isolation <= 10, average S/N <= 10, notch PSMs removed: TRUE
Thresholding with delta > 0.2, co-isolation <= 10, average S/N <= 100, notch PSMs removed: FALSE
Thresholding with delta > 0.2, co-isolation <= 10, average S/N <= 100, notch PSMs removed: TRUE
Thresholding with delta > 0.2, co-isolation <= 50, average S/N <= 0, notch PSMs removed: FALSE
Thresholding with delta > 0.2, co-isolation <= 50, average S/N <= 0, notch PSMs removed: TRUE
Thresholding with delta > 0.2, co-isolation <= 50, average S/N <= 10, notch PSMs removed: FALSE
Thresholding with delta > 0.2, co-isolation <= 50, average S/N <= 10, notch PSMs removed: TRUE
Thresholding with delta > 0.2, co-isolation <= 50, average S/N <= 100, notch PSMs removed: FALSE
Thresholding with delta > 0.2, co-isolation <= 50, average S/N <= 100, notch PSMs removed: TRUE
Thresholding with delta > 0.2, co-isolation <= 100, average S/N <= 0, notch PSMs removed: FALSE
Thresholding with delta > 0.2, co-isolation <= 100, average S/N <= 0, notch PSMs removed: TRUE
Thresholding with delta > 0.2, co-isolation <= 100, average S/N <= 10, notch PSMs removed: FALSE
Thresholding with delta > 0.2, co-isolation <= 100, average S/N <= 10, notch PSMs removed: TRUE
Thresholding with delta > 0.2, co-isolation <= 100, average S/N <= 100, notch PSMs removed: FALSE
Thresholding with delta > 0.2, co-isolation <= 100, average S/N <= 100, notch PSMs removed: TRUE
Thresholding with delta > 0.5, co-isolation <= 10, average S/N <= 0, notch PSMs removed: FALSE
Thresholding with delta > 0.5, co-isolation <= 10, average S/N <= 0, notch PSMs removed: TRUE
Thresholding with delta > 0.5, co-isolation <= 10, average S/N <= 10, notch PSMs removed: FALSE
Thresholding with delta > 0.5, co-isolation <= 10, average S/N <= 10, notch PSMs removed: TRUE
Thresholding with delta > 0.5, co-isolation <= 10, average S/N <= 100, notch PSMs removed: FALSE
Thresholding with delta > 0.5, co-isolation <= 10, average S/N <= 100, notch PSMs removed: TRUE
Thresholding with delta > 0.5, co-isolation <= 50, average S/N <= 0, notch PSMs removed: FALSE
Thresholding with delta > 0.5, co-isolation <= 50, average S/N <= 0, notch PSMs removed: TRUE
Thresholding with delta > 0.5, co-isolation <= 50, average S/N <= 10, notch PSMs removed: FALSE
Thresholding with delta > 0.5, co-isolation <= 50, average S/N <= 10, notch PSMs removed: TRUE
Thresholding with delta > 0.5, co-isolation <= 50, average S/N <= 100, notch PSMs removed: FALSE
Thresholding with delta > 0.5, co-isolation <= 50, average S/N <= 100, notch PSMs removed: TRUE
Thresholding with delta > 0.5, co-isolation <= 100, average S/N <= 0, notch PSMs removed: FALSE
Thresholding with delta > 0.5, co-isolation <= 100, average S/N <= 0, notch PSMs removed: TRUE
Thresholding with delta > 0.5, co-isolation <= 100, average S/N <= 10, notch PSMs removed: FALSE
Thresholding with delta > 0.5, co-isolation <= 100, average S/N <= 10, notch PSMs removed: TRUE
Thresholding with delta > 0.5, co-isolation <= 100, average S/N <= 100, notch PSMs removed: FALSE
Thresholding with delta > 0.5, co-isolation <= 100, average S/N <= 100, notch PSMs removed: TRUE
Thresholding with delta > 0, co-isolation <= 10, average S/N <= 0, notch PSMs removed: FALSE
Thresholding with delta > 0, co-isolation <= 10, average S/N <= 0, notch PSMs removed: TRUE
Thresholding with delta > 0, co-isolation <= 10, average S/N <= 10, notch PSMs removed: FALSE
Thresholding with delta > 0, co-isolation <= 10, average S/N <= 10, notch PSMs removed: TRUE
Thresholding with delta > 0, co-isolation <= 10, average S/N <= 100, notch PSMs removed: FALSE
Thresholding with delta > 0, co-isolation <= 10, average S/N <= 100, notch PSMs removed: TRUE
Thresholding with delta > 0, co-isolation <= 50, average S/N <= 0, notch PSMs removed: FALSE
Thresholding with delta > 0, co-isolation <= 50, average S/N <= 0, notch PSMs removed: TRUE
Thresholding with delta > 0, co-isolation <= 50, average S/N <= 10, notch PSMs removed: FALSE
Thresholding with delta > 0, co-isolation <= 50, average S/N <= 10, notch PSMs removed: TRUE
Thresholding with delta > 0, co-isolation <= 50, average S/N <= 100, notch PSMs removed: FALSE
Thresholding with delta > 0, co-isolation <= 50, average S/N <= 100, notch PSMs removed: TRUE
Thresholding with delta > 0, co-isolation <= 100, average S/N <= 0, notch PSMs removed: FALSE
Thresholding with delta > 0, co-isolation <= 100, average S/N <= 0, notch PSMs removed: TRUE
Thresholding with delta > 0, co-isolation <= 100, average S/N <= 10, notch PSMs removed: FALSE
Thresholding with delta > 0, co-isolation <= 100, average S/N <= 10, notch PSMs removed: TRUE
Thresholding with delta > 0, co-isolation <= 100, average S/N <= 100, notch PSMs removed: FALSE
Thresholding with delta > 0, co-isolation <= 100, average S/N <= 100, notch PSMs removed: TRUE
Thresholding with delta > 0.2, co-isolation <= 10, average S/N <= 0, notch PSMs removed: FALSE
Thresholding with delta > 0.2, co-isolation <= 10, average S/N <= 0, notch PSMs removed: TRUE
Thresholding with delta > 0.2, co-isolation <= 10, average S/N <= 10, notch PSMs removed: FALSE
Thresholding with delta > 0.2, co-isolation <= 10, average S/N <= 10, notch PSMs removed: TRUE
Thresholding with delta > 0.2, co-isolation <= 10, average S/N <= 100, notch PSMs removed: FALSE
Thresholding with delta > 0.2, co-isolation <= 10, average S/N <= 100, notch PSMs removed: TRUE
Thresholding with delta > 0.2, co-isolation <= 50, average S/N <= 0, notch PSMs removed: FALSE
Thresholding with delta > 0.2, co-isolation <= 50, average S/N <= 0, notch PSMs removed: TRUE
Thresholding with delta > 0.2, co-isolation <= 50, average S/N <= 10, notch PSMs removed: FALSE
Thresholding with delta > 0.2, co-isolation <= 50, average S/N <= 10, notch PSMs removed: TRUE
Thresholding with delta > 0.2, co-isolation <= 50, average S/N <= 100, notch PSMs removed: FALSE
Thresholding with delta > 0.2, co-isolation <= 50, average S/N <= 100, notch PSMs removed: TRUE
Thresholding with delta > 0.2, co-isolation <= 100, average S/N <= 0, notch PSMs removed: FALSE
Thresholding with delta > 0.2, co-isolation <= 100, average S/N <= 0, notch PSMs removed: TRUE
Thresholding with delta > 0.2, co-isolation <= 100, average S/N <= 10, notch PSMs removed: FALSE
Thresholding with delta > 0.2, co-isolation <= 100, average S/N <= 10, notch PSMs removed: TRUE
Thresholding with delta > 0.2, co-isolation <= 100, average S/N <= 100, notch PSMs removed: FALSE
Thresholding with delta > 0.2, co-isolation <= 100, average S/N <= 100, notch PSMs removed: TRUE
Thresholding with delta > 0.5, co-isolation <= 10, average S/N <= 0, notch PSMs removed: FALSE
Thresholding with delta > 0.5, co-isolation <= 10, average S/N <= 0, notch PSMs removed: TRUE
Thresholding with delta > 0.5, co-isolation <= 10, average S/N <= 10, notch PSMs removed: FALSE
Thresholding with delta > 0.5, co-isolation <= 10, average S/N <= 10, notch PSMs removed: TRUE
Thresholding with delta > 0.5, co-isolation <= 10, average S/N <= 100, notch PSMs removed: FALSE
Thresholding with delta > 0.5, co-isolation <= 10, average S/N <= 100, notch PSMs removed: TRUE
Thresholding with delta > 0.5, co-isolation <= 50, average S/N <= 0, notch PSMs removed: FALSE
Thresholding with delta > 0.5, co-isolation <= 50, average S/N <= 0, notch PSMs removed: TRUE
Thresholding with delta > 0.5, co-isolation <= 50, average S/N <= 10, notch PSMs removed: FALSE
Thresholding with delta > 0.5, co-isolation <= 50, average S/N <= 10, notch PSMs removed: TRUE
Thresholding with delta > 0.5, co-isolation <= 50, average S/N <= 100, notch PSMs removed: FALSE
Thresholding with delta > 0.5, co-isolation <= 50, average S/N <= 100, notch PSMs removed: TRUE
Thresholding with delta > 0.5, co-isolation <= 100, average S/N <= 0, notch PSMs removed: FALSE
Thresholding with delta > 0.5, co-isolation <= 100, average S/N <= 0, notch PSMs removed: TRUE
Thresholding with delta > 0.5, co-isolation <= 100, average S/N <= 10, notch PSMs removed: FALSE
Thresholding with delta > 0.5, co-isolation <= 100, average S/N <= 10, notch PSMs removed: TRUE
Thresholding with delta > 0.5, co-isolation <= 100, average S/N <= 100, notch PSMs removed: FALSE
Thresholding with delta > 0.5, co-isolation <= 100, average S/N <= 100, notch PSMs removed: TRUE
dim(psm_res_flt$`AGC: 5E4`$`0.5, 10, 0, 0`)
[1] 19975 10
for(x in names(quant_vs_mean)){
tmp_data <- quant_vs_mean[[x]] %>%
filter(species!='mixed',
!comparison %in% positive_comparisons,
Isolation.Interference.in.Percent<=10,
Delta.Score>=0.5)
tmp_data_for_smooth <- tmp_data %>%
filter(species=='H.sapiens' | ((species=='S.cerevisiae' & intensity < 2^6.5)))
p <- tmp_data %>%
ggplot(aes(log2(intensity), diff)) +
geom_point(size=0.05, alpha=0.05, colour='grey10') +
geom_smooth(data=tmp_data_for_smooth, aes(log2(intensity), diff)) +
theme_camprot(base_size=12) +
facet_grid(species~comparison, scales='free_y') +
geom_hline(aes(yintercept=log2(expected)),
data=expected[!expected$comparison %in% positive_comparisons,],
colour='grey', linetype=2) +
xlab('Reporter ion intensity (log2)') +
ylab('Difference in intensity (log2)') +
coord_cartesian(ylim=c(-5,3))
ggsave(sprintf('../results/plots/%s_psm_fold_changes.png', gsub('[: ]', '', x)), p)
print(p + ggtitle(x))
}
Saving 7 x 7 in image
# define datasets to plot intensities, notch per protein and missing values
datasets <- list(c('0, 100, 0, 0'), c('0.5, 10, 0, 0'))
#datasets <- list(c('0.5, 10, 0, 0'))
psm_res_flt %>% names() %>% lapply(function(agc){
datasets %>% lapply(function(thresholds){
print(thresholds)
all <- psm_res_flt[[agc]][[thresholds]]
print(all)
hs <- all[fData(all)$species=='H.sapiens']
sc <- all[fData(all)$species=='S.cerevisiae']
slices <- list('All'=all, 'H.sapiens'=hs, 'S.cerevisiae'=sc)
for(slice in names(slices)){
p <- slices[[slice]] %>% plot_TMT_notch() +
xlab('Reporter ion intensity (log2)') +
ggtitle(sprintf('%s\n%s\n%s', agc, thresholds, slice))
print(p)
p2 <- slices[[slice]] %>% plot_TMT_notch(facet_by_sample=TRUE) +
xlab('Reporter ion intensity (log2)') +
ggtitle(sprintf('%s\n%s\n%s', agc, thresholds, slice))
print(p2)
if(agc=='AGC: 5E4'){
ggsave(sprintf('../results/plots/%s_ion_intensities.png', slice), p)
ggsave(sprintf('../results/plots/%s_ion_intensities_per_tag.png', slice), p2)
}
}
return(NULL)
})
return(NULL)
})
[1] "0, 100, 0, 0"
MSnSet (storageMode: lockedEnvironment)
assayData: 99650 features, 10 samples
element names: exprs
protocolData: none
phenoData
sampleNames: 126 127N ... 131 (10 total)
varLabels: sample_name condition ... human (5 total)
varMetadata: labelDescription
featureData
featureNames: 1 2 ... 100119 (99650 total)
fvarLabels: Protein.Accessions PSMs.Workflow.ID ... species (42 total)
fvarMetadata: labelDescription
experimentData: use 'experimentData(object)'
Annotation:
- - - Processing information - - -
Log transformed (base 2) Tue Nov 10 21:19:07 2020
Normalised (diff.median): Tue Nov 10 21:19:07 2020
Subset [100119,10][99650,10] Tue Nov 10 21:19:12 2020
Subset [99650,10][99650,10] Tue Nov 10 21:19:12 2020
Subset [99650,10][99650,10] Tue Nov 10 21:19:12 2020
Subset [99650,10][99650,10] Tue Nov 10 21:19:12 2020
MSnbase version: 2.14.2
[1] "0, 100, 0, 0"
MSnSet (storageMode: lockedEnvironment)
assayData: 109197 features, 10 samples
element names: exprs
protocolData: none
phenoData
sampleNames: 126 127N ... 131 (10 total)
varLabels: sample_name condition ... human (5 total)
varMetadata: labelDescription
featureData
featureNames: 1 2 ... 109779 (109197 total)
fvarLabels: Protein.Accessions PSMs.Workflow.ID ... species (42 total)
fvarMetadata: labelDescription
experimentData: use 'experimentData(object)'
Annotation:
- - - Processing information - - -
Log transformed (base 2) Tue Nov 10 21:19:26 2020
Normalised (diff.median): Tue Nov 10 21:19:26 2020
Subset [109779,10][109197,10] Tue Nov 10 21:19:31 2020
Subset [109197,10][109197,10] Tue Nov 10 21:19:31 2020
Subset [109197,10][109197,10] Tue Nov 10 21:19:31 2020
Subset [109197,10][109197,10] Tue Nov 10 21:19:32 2020
MSnbase version: 2.14.2
[[1]]
NULL
[[2]]
NULL
Tallies for fraction sub-notch PSMs per protein
datasets <- list(c('0.5, 10, 0, 0'))
psm_res_flt %>% names() %>% lapply(function(agc){
datasets %>% lapply(function(thresholds){
all <- psm_res_flt[[agc]][[thresholds]]
hs <- all[fData(all)$species=='H.sapiens']
sc <- all[fData(all)$species=='S.cerevisiae']
slices <- list('All'=all, 'H.sapiens'=hs, 'S.cerevisiae'=sc)
for(slice in names(slices)){
notch_per_protein <- get_notch_per_protein(slices[[slice]]) %>%
mutate(sample=factor(remove_x(sample), colnames(slices[[slice]])))
print(notch_per_protein %>%
group_by(sample, fraction_below>=0.25) %>%
tally())
p <- notch_per_protein %>%
filter(fraction_below>0) %>%
ggplot(aes(.data$fraction_below)) +
geom_histogram(bins = 10) +
theme_camprot(base_size = 10) +
facet_wrap(~sample) +
xlab("Fraction at/below notch PSMs") +
ylab("Proteins") +
scale_x_continuous(breaks=seq(0,1,0.5))
print(p + ggtitle(sprintf('%s\n%s\n%s', agc, thresholds, slice)))
if(agc=='AGC: 5E4' & slice=='All'){
ggsave('../results/plots/fraction_sub_notch.png', p)
ggsave('../results/plots/fraction_sub_notch_per_tag.png', p2)
}
}
return(NULL)
})
return(NULL)
})
Error in grid.draw(plot) : object 'p2' not found
Missing values frequencies.
psm_res_flt %>% names() %>% lapply(function(agc){
datasets %>% lapply(function(thresholds){
all <- psm_res_flt[[agc]][[thresholds]]
hs <- all[fData(all)$species=='H.sapiens']
sc <- all[fData(all)$species=='S.cerevisiae']
slices <- list('All'=all, 'H.sapiens'=hs, 'S.cerevisiae'=sc)
for(slice in names(slices)) plotNA(slices[[slice]], pNA = 0)
return(NULL)
})
return(NULL)
})
[[1]]
NULL
[[2]]
NULL
Save out objects for downstream notebooks
saveRDS(quant_vs_mean, '../results/quant_vs_mean.rds')
saveRDS(psm_res_flt, '../results/psm_res_flt.rds')
saveRDS(expected, '../results/expected.rds')